################################################################################
# Created By:Pietryka
# Creation Date:  2016-12-23
# Purpose: fit models w/ gender interactions on cces panel data
# Questions: mpietryka@fsu.edu
################################################################################

# PREAMBLE =============================================


# LOAD PACKAGES -----------------
library(Amelia)
library(tidyverse)


# LOAD DATA--------------

# Workspace from 'CCES2-Impute.R'
load("Data/CCES2-Impute.Rdata")


# DEFINE FUNCTIONS  =============================================

library(survival)

# FUNCTION TO FIT MODEL ON IMPUTED DATA
felogit_imp <- function(the_formula, the_data) {
  lapply(the_data, function(x){
    clogit(formula = formula(paste0(the_formula, "+ strata(caseid)")),
           data = x)
  })
}


# FUNCTION TO COMBINE ESTIMATES USING RUBIN'S RULES
combine_results <- function(results_list) {
  m_imps <- length(results_list)
  var_names <- results_list[[1]]$coefficients  %>% names()
  b_out  <- sapply(results_list, function(x){x$coefficients})  %>%
    matrix(nrow = m_imps, byrow = TRUE, dimnames = list(NULL, var_names))
  se_out <- sapply(results_list, function(x){summary(x)$coefficients[, 3]}) %>%
    matrix(nrow = m_imps, byrow = TRUE, dimnames = list(NULL, var_names))
  fit_out <- map_df(results_list,~broom::glance(.x))
  ests = mi.meld(q = b_out, se = se_out)
  fit = map_df(fit_out, mean)
  list(q.mi = ests$q.mi, se.mi = ests$se.mi, n = fit$n, aic = fit$AIC, bic = fit$BIC)
}


# Split data by gender --------------------------------------

cces_cleaned_men <- map(cces_cleaned, filter, female == 0)
cces_cleaned_women <- map(cces_cleaned, filter, female == 1)


# DEFINE MODELS   ===========================================================



outcome_names <- c("turnout",
                   "attendmeeting",
                   "polsign",
                   "workcampaign",
                   "donatemoney")
controls_a <- c("collgrad", "contacted", "factor(famincquart)")
controls_b <- c(controls_a,
                "married",
                "countymove_change",
                "homeowner",
                "student",
                "employfull",
                "churchfreq",
                "newsint"
)



formulae <- data_frame(y = outcome_names)  %>%
  mutate(form1 = paste0(y, " ~ ", "parent + factor(year)"))  %>%
  mutate(form2 = paste0(y, " ~ ", "parent + factor(year) +",
                        paste(controls_a, collapse = " + ")))  %>%
  mutate(form3 = paste0(y, " ~ ", "parent + factor(year) +",
                        paste(controls_b, collapse = " + ")))  %>%
  mutate_at(vars(form1, form2, form3), funs(map(., formula, env = globalenv())))  %>%
  gather(equation, form, -y)






# FIT MODELS, COMBINE RESULTS  --------------


# MEN
mods_men     <- list()
combined_men <- list()
for (i in seq_len(nrow(formulae))){
  print(i)
  # FIT MODELS ON EACH IMPUTED DATASET
  mods_men[[i]] <- felogit_imp(the_formula = formulae[i, "form"]$form,
                                   the_data = cces_cleaned_men)
  # COMBINE ESTIMATES  USING RUBIN'S RULES
  combined_men[[i]] <- combine_results(mods_men[[i]])
}


# WOMEN
mods_women     <- list()
combined_women <- list()
for (i in seq_len(nrow(formulae))){
  print(i)
  # FIT MODELS ON EACH IMPUTED DATASET
  mods_women[[i]] <- felogit_imp(the_formula = formulae[i, "form"]$form,
                               the_data = cces_cleaned_women)
  # COMBINE ESTIMATES  USING RUBIN'S RULES
  combined_women[[i]] <- combine_results(mods_women[[i]])
}



results_men <- formulae  %>%
  bind_cols(data_frame(
    coefs = map(combined_men, 1),
    ses   = map(combined_men, 2),
    gender = "Men"
  ))


results_women <- formulae  %>%
  bind_cols(tibble(
    coefs = map(combined_women, 1),
    ses   = map(combined_women, 2),
    gender = "Women"
  ))

cces_results_gender <- results_men  %>%
  bind_rows (results_women) %>%
  mutate(beta = map_dbl(coefs, ~ .x[colnames(.x) == "parent"]))  %>%
  mutate(se = map_dbl(ses, ~ .x[colnames(.x) == "parent"]))  %>%
  mutate(lb   = beta - 1.96 * se)   %>%
  mutate(ub   = beta + 1.96 * se)  %>%
  mutate(lab = recode(equation,
                      form1 = "1. Baseline Model",
                      form2 = "2. Intermediate Model",
                      form3 = "3. Full Model"))  %>%
  mutate(lab = case_when(
    y != "attendmeeting" ~ lab,
    lab == "1. Baseline Model" ~ paste0(lab, ": No Controls"),
    lab == "2. Intermediate Model" ~ lab,
    lab == "3. Full Model" ~ paste0(lab, ": All Controls")))  %>%
  mutate(lab = forcats::fct_rev(lab))  %>%
  mutate(y_lab = recode(y,
                        'turnout' = "Turnout",
                        'attendmeeting' = "Attend Meeting",
                        'workcampaign' = "Work for Campaign",
                        'donatemoney' = "Donate Money",
                        'polsign' = "Display Sign")
  ) %>%
  mutate(dot_type = ifelse( sign(lb) == sign(ub),
                            "statistically significant",
                            "statistically insignificant")
  )








# CREATE PLOT ==========================================




the_dodge <- 0.5
data <- cces_results_gender  %>% filter(y == "turnout")
gender_plot <- ggplot(data, aes(x = forcats::fct_rev(equation),
                             y = beta,
                             fill = dot_type,
                             group= gender,
                             shape = gender,
                             color = gender,
                             label = gender)) +
  geom_hline(yintercept = 0, linetype = 2, size = 1) +
  geom_pointrange(aes(ymin = lb, ymax = ub),
                  position = position_dodge(width = the_dodge),
                  fatten = 8, linewidth = 2) +
  theme_minimal(base_size = 12) +
  expand_limits(y = -1.8) +
  geom_text(
    aes(y = ub ),
            position = position_dodge(width = the_dodge),
    hjust = 0,
            size = 4) +
  coord_flip() +
  xlab(NULL) +
  ylab("Estimate (log odds)") +
  expand_limits(y = c(-1.8, 1.8)) +
  facet_wrap(~ fct_rev(lab), ncol = 1, as.table = TRUE, scales = "free_y") +
  theme(plot.subtitle = element_text(colour = "gray50")) +
  theme(plot.caption = element_text(colour = "gray50")) +
  theme(legend.position = "none") +
  theme(
    axis.text.y = element_blank(),
    panel.grid.major.y = element_blank()
    ) +
  theme(text = element_text(size = 16)) +
  theme(strip.text = element_text(hjust = 0)) +
  scale_fill_manual(values = c("white", "black")) +
  scale_color_grey(start = 0.2, end = 0.5) +
  scale_shape_manual(values = c(15, 17))



graphics.off()
windows(3, 4)
gender_plot




# SAVE ===============================================


ggsave("Figures/CCES-Fixed-Effects-Gender.tiff", compression = "lzw")






